Plotting function to reduce code length + simplify. Then, short function to place MLS weight smoothing kernel into same category to make visually consistent, updates are:

  • 3.0 –> 3.6
  • 4.0 –> 4.8
  • 5.0 –> 6.0
  • 6.0 –> 7.2
  • 7.0 –> 8.4
create_plot <- function(dataset, y_est, type, thresh, y_min, y_max, y_lab, add_title = TRUE) {
  plot <- dataset %>%
    ggplot(aes(x = .data[[type]], y = .data[[y_est]], fill = .data[[type]], color = .data[[type]])) +
    geom_rain(alpha = .5, rain.side = 'l',
              boxplot.args = list(color = "black", outlier.shape = NA),
              boxplot.args.pos = list(
                position = ggpp::position_dodgenudge(x = .1), width = 0.1
              )) +
    facet_grid(~study) +
    ylab(y_lab) +
    ylim(y_min,y_max)+
    theme_classic() 
  
  if (add_title) {
    plot <- plot + ggtitle(paste("Distribution by", type, "category (", thresh, "mask)"))
  }
  
  plot <- plot +
    scale_fill_manual(values = pal) +
    scale_color_manual(values = pal) +
    guides(fill = 'none', color = 'none') +
    theme(text = element_text(family = "Times New Roman"),
          axis.text = element_text(size = 12, angle = 45, hjust = 1),
          axis.title = element_text(size = 12),
          legend.text = element_text(size = 12),
          legend.title = element_text(size = 12),
          plot.title = element_text(size = 16))
}

by_sample_conmod <- function(dataset, y_est, y_min, y_max, y_lab) {
  
  # Define a function to create individual plots
  create_plot <- function(data, study) {
    ggplot(data, aes(x = con, y = .data[[y_est]], fill = con, color = con)) +
      geom_rain(alpha = .5, rain.side = 'l',
                boxplot.args = list(color = "black", outlier.shape = NA),
                boxplot.args.pos = list(
                  position = ggpp::position_dodgenudge(x = .1), width = 0.1
                )) +
      facet_grid(~model) +
      ylab(y_lab) +
      ylim(y_min,y_max)+
      theme_classic() +
      scale_fill_manual(values = pal) +
      scale_color_manual(values = pal) +
      guides(fill = 'none', color = 'none') +
      theme(text = element_text(family = "Times New Roman"),
            axis.text = element_text(size = 12, angle = 45, hjust = 1),
            axis.title = element_text(size = 12),
            legend.text = element_text(size = 12),
            legend.title = element_text(size = 12),
            plot.title = element_text(size = 16)) +
      ggtitle(study) 
  }
  
  # Create plots for each study using map
  plot_list <- purrr::map(c("mls", "ahrb", "abcd"), ~ dataset %>%
                            filter(study == .x) %>%
                            create_plot(.x))
  
  # Combine plots into a single patchwork object
  combined_plots <- wrap_plots(plotlist = plot_list)
  
  # Add patchwork annotations
  combined_plots 
}

update_mls_fwhm <- function(df) {
  df <- df %>%
    mutate(fwhm = case_when(
      fwhm == "fwhm-3.0" ~ "fwhm-3.6",
      fwhm == "fwhm-4.0" ~ "fwhm-4.8",
      fwhm == "fwhm-5.0" ~ "fwhm-6.0",
      fwhm == "fwhm-6.0" ~ "fwhm-7.2",
      fwhm == "fwhm-7.0" ~ "fwhm-8.4",
      TRUE ~ as.character(fwhm)  # if none of the conditions match, keep the original value
    ))
  return(df)
}

create_specr_plot <- function(summary_df, est_label) {
  plot_a = plot_curve(df = summary_df, ci = TRUE, desc = FALSE, legend = FALSE, null = 0)
  plot_b <- plot_choices(df = summary_df, choices = c("fwhm", "motion","contrast","model"), desc = F, null = 0) +
    labs(y = "Variables", x = paste("Ordered Specification Curve \n",est_label, "coefficient"))
  
  cowplot::plot_grid(plot_a, plot_b, ncol = 1, align = "v", axis = 'tblr',
                     labels = c('A', 'B'), rel_heights = c(1, 2),
                     label_fontfamily = "Times", label_size = 12)
}

calculate_summary_stats <- function(data, variable, est_type) {
  data %>%
    select(study, {{variable}}) %>% 
    group_by(study) %>% 
    summarise(
      "est_type" = est_type,
      "median" = median({{variable}}, na.rm = TRUE),
      "mean" = mean({{variable}}, na.rm = TRUE),
      "sd" = sd({{variable}}, na.rm = TRUE),
      "min" = min({{variable}}, na.rm = TRUE),
      "max" = max({{variable}}, na.rm = TRUE) 
    ) %>% 
  kbl(format = "html", 
      booktabs = TRUE) %>% 
  kable_styling(full_width = FALSE)
  
}

The packages are automatically loaded using pacman. The reported .html was last ran on the system: x86_64-apple-darwin17.0 and R version: R version 4.2.1 (2022-06-23) In the Stage 1 PCI Registered Report we are focused on Individual Continouous (intraclass correlation) and the binary/continuous group similarity (jaccard and spearman). This descriptive file includes the output information for the distribution plots, min/max/mean/median of the estimates and the spec plots for each estimate time. This report is for the Session1 between-run estimates

continuous

We stated:

Aim1: the range and distribution of median ICCs across each study (three) and analytic decision category (four) are plotted across suprathreshold task-positive and subthreshold ICCs using Rainclouds (Allen et al., 2019) and the median and standard deviation is reported in a table.

to visualize the ordered median ICCs across the 360 model permutations for suprathreshold task-positive and subthreshold ICCs, specification curve analyses are used (Simonsohn et al., 2020). Specifically, results across the 360 model permutations are reported using a specification curve to represent the range of estimated effects across the variable permutations. This consists of two panels: Panel A represents the ordered ICC coefficients and the ICC’s associated 95% confidence interval colored based on no significance (gray), negative (red) or positive (blue) significance from the Null (Null here is 0) and Panel B represents the analytic decisions from each of the four categories (see Table 1) that produced the median ICC estimates. In the main text, to compare the highest and lowest ICC’s produced by the model permutations, the 25th percentile and 75th percentile median ICC estimates from the 360 models are reported separately for suprathreshold task-positive and subthreshold activation (the specification curve for all ICC estimates for suprathreshold task-positive and subthreshold activation are provided as supplemental information).

Aim2: the range and distribution of median Between Subject and Within Subject across each study and analytic decision category are plotted across suprathreshold task-positive and subthreshold ICCs using Rainclouds.

two separate specification curve analyses report the ordered median Between Subject and Within Subject coefficients in one panel and the analytic decisions that produced the Between Subject and Within Subject estimates in a second panel separately for suprathreshold task-positive and subthreshold activation

group similarity

We stated:

Aim1: For each study, the coefficients are plotted to reflect the distribution and range of coefficients. Both Jaccard’s and Spearman correlation are reported separately.



1 Load data

1.1 ABCD

1.2 AHRB

1.3 MLS

1.4 combine data

2 Smoothing + T to D

2.1 Inherent Smoothing

smoothing_x = c(1.5,2,2.5,3,3.5)
(smoothing_x*4)
## [1]  6  8 10 12 14
cat((smoothing_x*.5)*4)
## 3 4 5 6 7

reporting the smoothness density across 240 group lvl model residual variance maps for ABCD, AHRB and MLS. Five smoothing kernels were used x voxel-size in 2.4mm for abcd/ahrb, resulting in smoothness for ABCD/AHRB and a weight of .50 was used for MLS 4mm voxel-size resulting in smoothness multiples of rcat((smoothing_x*.5)*4).

SmoothEstimate from FSL was used and Rsel^(1/3) is reported as it is “avg FWHM” based on: https://www.jiscmail.ac.uk/cgi-bin/webadmin?A2=FSL;e792b5da.0803

comb_smooth %>% 
  mutate(FWHM_avg = RESEL^(1/3)) %>% 
  ggplot(aes(x=FWHM_avg, fill = sample, colour = sample)) +
  geom_density(alpha = .5) +
  xlab("Avg FWHM") +
  xlim(0,10)+
  ylim(0,.5)+
  scale_fill_manual(values = pal) +  
  scale_colour_manual(values = pal) +
  theme_minimal()

comb_smooth %>% 
  group_by(sample) %>% 
  summarise(mean = mean(RESEL^(1/3)), sd = sd(RESEL^(1/3))) %>% 
  kbl(format = "html", 
      booktabs = TRUE) %>% 
  kable_styling(full_width = FALSE)
sample mean sd
ABCD 4.544900 1.418274
AHRB 4.185944 1.331808
MLS 3.772849 0.957380

2.2 t-stat to cohen’s d

n_range = seq(5, 200, 1)
result_df <- data.frame(n = numeric(0), 
                        t_stat = numeric(0), cohen_d = numeric(0))
for (n in n_range) {
  set.seed(123)
  n <- n
  population_mean <- 0
  mean <- 5
  sd <- 1
  
  # Generate random samples from a t-distribution
  df <- rnorm(n, mean = mean, sd = sd)
  
  
  # Perform a one-sample t-test
  t_test_result <- t.test(df, mu = population_mean)
  t_stat = t_test_result$statistic 
  cohens_d = t_test_result$statistic / sqrt(n)
  result_df <- rbind(result_df, data.frame(n = n, t_stat = t_stat, cohen_d = cohens_d))
}


result_df %>% 
  ggplot(aes(x = n)) +
  geom_line(aes(y = t_stat, color = "T-statistic"), size = .5) +
  geom_line(aes(y = cohen_d, color = "Cohen's d"), size = .5) +
  labs(caption = "Cohen's d = t-stat / sqrt(n)",
       x = "Sample Size (n)",
       y = "Value") +
  scale_color_manual(values = c("T-statistic" = "black", "Cohen's d" = "red")) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

3 Plot distributions

Below, running the steps to summarize the different Intraclass correlation (ICC), Mean Squared Between Subject (Between Subject), Mean Square Within Subject (Within Subject), and jaccard and spearman similarty for the model combinations 360 across samples

3.1 ICC

Plotting overall and for each of [four] categories

3.1.1 Subthreshold Mask

Creating rainclouds via ggrain

subset <- by_sample_conmod(icc_subthresh, y_est = "median_est", y_min =-.1, y_max=.6, y_lab = "Median ICC")
fwhm_rg = create_plot(icc_subthresh, y_est = "median_est", type = "fwhm",  thresh = "sub-threshold", 
                      y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
motion_rg = create_plot(icc_subthresh, y_est = "median_est", type = "motion", thresh = "sub-threshold", 
                        y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
modeltype_rg = create_plot(icc_subthresh, y_est = "median_est", type = "model", thresh = "sub-threshold",
                           y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
contrast_rg = create_plot(icc_subthresh, y_est = "median_est", type = "con", thresh = "sub-threshold", 
                          y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)

cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

subset

3.1.2 Suprathreshold Mask

subset <- by_sample_conmod(icc_suprathresh, y_est = "median_est", y_min=-.1, y_max=.6, y_lab = "Median ICC")
fwhm_rg = create_plot(icc_suprathresh, y_est = "median_est", type = "fwhm",  thresh = "supra-threshold", 
                      y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
motion_rg = create_plot(icc_suprathresh, y_est = "median_est", type = "motion", thresh = "supra-threshold", 
                        y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
modeltype_rg = create_plot(icc_suprathresh, y_est = "median_est", type = "model", thresh = "supra-threshold",
                           y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)
contrast_rg = create_plot(icc_suprathresh, y_est = "median_est", type = "con", thresh = "supra-threshold", 
                          y_min=-.1, y_max=.6, y_lab = "Median ICC", add_title=FALSE)


cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.1.3 Nacc avg Mask

3.1.3.2 Left

fwhm_rg = create_plot(icc_nacc, y_est = "avg_left", "fwhm","Avg Left NAcc", y_min=-.3, y_max=.6, y_lab = "ICC", add_title=FALSE)
motion_rg = create_plot(icc_nacc, y_est = "avg_left", "motion","Avg Left NAcc", y_min=-.3, y_max=.6, y_lab = "ICC", add_title=FALSE)
modeltype_rg = create_plot(icc_nacc, y_est = "avg_left", "model","Avg Left NAcc",y_min=-.3, y_max=.6, y_lab = "ICC", add_title=FALSE)
contrast_rg = create_plot(icc_nacc, y_est = "avg_left", "con","Avg Left NAcc", y_min=-.3, y_max=.6, y_lab = "ICC", add_title=FALSE)

cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for Left NAcc")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for Left NAcc
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

3.2 Between Subject

Plotting overall and for each of [four] categories

3.2.1 Subthreshold Mask

subset <- by_sample_conmod(bs_subthresh, y_est = "median_est", y_min=-.01, y_max=2, y_lab = "Median Between Subject")
fwhm_rg = create_plot(bs_subthresh, y_est = "median_est", type = "fwhm",  thresh = "sub-threshold", 
                      y_min=-.01, y_max=2, y_lab = "Median Between Subject", add_title=FALSE)
motion_rg = create_plot(bs_subthresh, y_est = "median_est", type = "motion", thresh = "sub-threshold", 
                        y_min=-.01, y_max=2, y_lab = "Median Between Subject", add_title=FALSE)
modeltype_rg = create_plot(bs_subthresh, y_est = "median_est", type = "model", thresh = "sub-threshold",
                           y_min=-.01, y_max=2, y_lab = "Median Between Subject", add_title=FALSE)
contrast_rg = create_plot(bs_subthresh, y_est = "median_est", type = "con", thresh = "sub-threshold", 
                          y_min=-.01, y_max=2, y_lab = "Median Between Subject", add_title=FALSE)

cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 
## Warning: Removed 5 rows containing non-finite values (`stat_half_ydensity()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing missing values (`geom_point_sorted()`).
## Warning: Removed 5 rows containing non-finite values (`stat_half_ydensity()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing missing values (`geom_point_sorted()`).
## Warning: Removed 5 rows containing non-finite values (`stat_half_ydensity()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing missing values (`geom_point_sorted()`).
## Warning: Removed 5 rows containing non-finite values (`stat_half_ydensity()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing missing values (`geom_point_sorted()`).

subset
## Warning: Removed 5 rows containing non-finite values (`stat_half_ydensity()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing missing values (`geom_point_sorted()`).

3.2.2 Suprathreshold Mask

subset <- by_sample_conmod(bs_suprathresh, y_est = "median_est", y_min=-1, y_max=2, y_lab = "Median Between Subject")
fwhm_rg = create_plot(bs_suprathresh, y_est = "median_est", type = "fwhm",  thresh = "supra-threshold", 
                      y_min=-.01, y_max=2, y_lab = "Median Between Subject", add_title=FALSE)
motion_rg = create_plot(bs_suprathresh, y_est = "median_est", type = "motion", thresh = "supra-threshold", 
                        y_min=-.01, y_max=2, y_lab = "Median Between Subject", add_title=FALSE)
modeltype_rg = create_plot(bs_suprathresh, y_est = "median_est", type = "model", thresh = "supra-threshold",
                           y_min=-.01, y_max=2, y_lab = "Median Between Subject", add_title=FALSE)
contrast_rg = create_plot(bs_suprathresh, y_est = "median_est", type = "con", thresh = "supra-threshold", 
                          y_min=-.01, y_max=2, y_lab = "Median Between Subject", add_title=FALSE)


cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 
## Warning: Removed 5 rows containing non-finite values (`stat_half_ydensity()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing missing values (`geom_point_sorted()`).
## Warning: Removed 5 rows containing non-finite values (`stat_half_ydensity()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing missing values (`geom_point_sorted()`).
## Warning: Removed 5 rows containing non-finite values (`stat_half_ydensity()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing missing values (`geom_point_sorted()`).
## Warning: Removed 5 rows containing non-finite values (`stat_half_ydensity()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing missing values (`geom_point_sorted()`).

subset

3.3 Within Subject

Plotting overall and for each of [four] categories

3.3.1 Subthreshold Mask

subset <- by_sample_conmod(ws_subthresh, y_est = "median_est", y_min=0, y_max=7, y_lab = "Median Within Subject")
fwhm_rg = create_plot(ws_subthresh, y_est = "median_est", type = "fwhm",  thresh = "sub-threshold", 
                      y_min=0, y_max=7, y_lab = "Median Within Subject", add_title=FALSE)
motion_rg = create_plot(ws_subthresh, y_est = "median_est", type = "motion", thresh = "sub-threshold", 
                        y_min=0, y_max=7, y_lab = "Median Within Subject", add_title=FALSE)
modeltype_rg = create_plot(ws_subthresh, y_est = "median_est", type = "model", thresh = "sub-threshold",
                           y_min=0, y_max=7, y_lab = "Median Within Subject", add_title=FALSE)
contrast_rg = create_plot(ws_subthresh, y_est = "median_est", type = "con", thresh = "sub-threshold", 
                          y_min=0, y_max=7, y_lab = "Median Within Subject", add_title=FALSE)

cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for sub-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.3.2 Suprathreshold Mask

subset <- by_sample_conmod(ws_suprathresh, y_est = "median_est", y_min=0, y_max=7, y_lab = "Median Within Subject")
fwhm_rg = create_plot(ws_suprathresh, y_est = "median_est", type = "fwhm",  thresh = "supra-threshold", 
                      y_min=0, y_max=7, y_lab = "Median Within Subject", add_title=FALSE)
motion_rg = create_plot(ws_suprathresh, y_est = "median_est", type = "motion", thresh = "supra-threshold", 
                        y_min=0, y_max=7, y_lab = "Median Within Subject", add_title=FALSE)
modeltype_rg = create_plot(ws_suprathresh, y_est = "median_est", type = "model", thresh = "supra-threshold",
                           y_min=0, y_max=7, y_lab = "Median Within Subject", add_title=FALSE)
contrast_rg = create_plot(ws_suprathresh, y_est = "median_est", type = "con", thresh = "supra-threshold", 
                          y_min=0, y_max=7, y_lab = "Median Within Subject", add_title=FALSE)

cat("Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask")
## Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast for supra-threshold mask
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.4 Similarity

3.4.1 jaccard

subset <- by_sample_conmod(similarity_df, y_est = "jaccard", y_min=0, y_max = 1, y_lab = "Jaccard")

fwhm_rg = create_plot(similarity_df, y_est = "jaccard", type = "fwhm", thresh = "Jaccard", 
                      y_min=0, y_max = 1, y_lab = "Jaccard", add_title=FALSE)
motion_rg = create_plot(similarity_df, y_est = "jaccard", type = "motion", thresh = "Jaccard", 
                        y_min=0, y_max = 1, y_lab = "Jaccard", add_title=FALSE)
modeltype_rg = create_plot(similarity_df, y_est = "jaccard", type = "model", thresh="Jaccrd", 
                           y_min=0, y_max = 1, y_lab = "Jaccard", add_title=FALSE)
contrast_rg = create_plot(similarity_df, y_est = "jaccard", type = "con", thresh = "Jaccard", 
                          y_min=0, y_max = 1, y_lab = "Jaccard", add_title=FALSE)

cat("Jaccard: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast")
## Jaccard: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.4.2 spearman

subset <- by_sample_conmod(similarity_df, y_est = "spearman", y_min=0, y_max = 1, y_lab = "Spearman")

fwhm_rg = create_plot(similarity_df, y_est = "spearman", type = "fwhm", thresh = "Spearman", 
                      y_min=0, y_max = 1, y_lab = "Spearman", add_title=FALSE)
motion_rg = create_plot(similarity_df, y_est = "spearman", type = "motion", thresh = "Spearman", 
                        y_min=0, y_max = 1, y_lab = "Spearman", add_title=FALSE)
modeltype_rg = create_plot(similarity_df, y_est = "spearman", type = "model", thresh="Spearman", 
                           y_min=0, y_max = 1, y_lab = "Spearman", add_title=FALSE)
contrast_rg = create_plot(similarity_df, y_est = "spearman", type = "con", thresh = "Spearman", 
                          y_min=0, y_max = 1, y_lab = "Spearman", add_title=FALSE)

cat("Spearman: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast")
## Spearman: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

3.5 Group Cohen’s ~ ICC

cohens_sim <- gather(similarity_df, key = "Run", value = "spearman", run1_icc_cohensd:run2_icc_cohensd) %>% 
  mutate(Run = case_when(
    Run == "run1_icc_cohensd" ~ "Run1",
    Run == "run2_icc_cohensd" ~ "Run2",
    TRUE ~ Run
  ))
cat("\tICC, Between Subject and Within Subject median, miniumum and maximum")
##  ICC, Between Subject and Within Subject median, miniumum and maximum
calculate_summary_stats(similarity_df, run1_icc_cohensd, "ICC ~ R1 Cohen's D")
study est_type median mean sd min max
abcd ICC ~ R1 Cohen’s D 0.0082622 -0.0501398 0.1887948 -0.4274063 0.2157566
ahrb ICC ~ R1 Cohen’s D 0.1254760 0.0887109 0.2255808 -0.4083736 0.5030614
mls ICC ~ R1 Cohen’s D 0.1099449 0.0802882 0.2146424 -0.3459887 0.4323042
calculate_summary_stats(similarity_df, run2_icc_cohensd, "ICC ~ R2 Cohen's D")
study est_type median mean sd min max
abcd ICC ~ R2 Cohen’s D 0.0405920 -0.0391586 0.2076897 -0.4684577 0.2613830
ahrb ICC ~ R2 Cohen’s D 0.1275412 0.1029176 0.2154479 -0.3957997 0.5103372
mls ICC ~ R2 Cohen’s D 0.1224453 0.0767892 0.2349547 -0.3756960 0.4559185
cohens_sim %>% ggplot(aes(x = fwhm, y = spearman, fill = fwhm, color = fwhm)) +
  geom_rain(alpha = .5, rain.side = 'l',
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(
              position = ggpp::position_dodgenudge(x = .2), width = 0.1
            )) +
  facet_grid(~study + Run) +
  theme_classic() +
  ggtitle("Distribution by FWHM category") +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  guides(fill = 'none', color = 'none') +
  theme(text = element_text(family = "Times New Roman"),
        axis.text = element_text(size = 12, angle = 45, hjust = 1),
        axis.title = element_text(size = 12),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        plot.title = element_text(size = 16))

cohens_sim %>% ggplot(aes(x = con, y = spearman, fill = con, color = con)) +
  geom_rain(alpha = .5, rain.side = 'l',
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(
              position = ggpp::position_dodgenudge(x = .2), width = 0.1
            )) +
  facet_grid(~study + Run) +
  theme_classic() +
  ggtitle("Distribution by Contrast category") +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  guides(fill = 'none', color = 'none') +
  theme(text = element_text(family = "Times New Roman"),
        axis.text = element_text(size = 12, angle = 45, hjust = 1),
        axis.title = element_text(size = 12),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        plot.title = element_text(size = 16))

cohens_sim %>% ggplot(aes(x = motion, y = spearman, fill = motion, color = motion)) +
  geom_rain(alpha = .5, rain.side = 'l',
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(
              position = ggpp::position_dodgenudge(x = .2), width = 0.1
            )) +
  facet_grid(~study + Run) +
  theme_classic() +
  ggtitle("Distribution by Motion category") +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  guides(fill = 'none', color = 'none') +
  theme(text = element_text(family = "Times New Roman"),
        axis.text = element_text(size = 12, angle = 45, hjust = 1),
        axis.title = element_text(size = 12),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        plot.title = element_text(size = 16))

cohens_sim %>% ggplot(aes(x = model, y = spearman, fill = model, color = model)) +
  geom_rain(alpha = .5, rain.side = 'l',
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(
              position = ggpp::position_dodgenudge(x = .2), width = 0.1
            )) +
  facet_grid(~study + Run) +
  theme_classic() +
  ggtitle("Distribution by Model category") +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  guides(fill = 'none', color = 'none') +
  theme(text = element_text(family = "Times New Roman"),
        axis.text = element_text(size = 12, angle = 45, hjust = 1),
        axis.title = element_text(size = 12),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        plot.title = element_text(size = 16))

cat("Spearman: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast")
## Spearman: Plotting A = Motion; B = FWHM; C = Model Parameterization, D = Contrast
(motion_rg | fwhm_rg) / (modeltype_rg | contrast_rg) + plot_annotation(tag_levels = c("A","B","C","D")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

3.6 Dev Comp.

3.6.1 Full

older <- icc_suprathresh %>% filter(study %in% c('mls', 'ahrb')) 
younger <- icc_suprathresh %>% filter(study == 'abcd') 

cat("Comparing (x) older samples AHRB/MLS (n = ", nrow(older),")", "versus (y) younger sample ABCD (n = ", nrow(younger),")")
## Comparing (x) older samples AHRB/MLS (n =  480 ) versus (y) younger sample ABCD (n =  240 )
t.test(older$median_est, younger$median_est)
## 
##  Welch Two Sample t-test
## 
## data:  older$median_est and younger$median_est
## t = 5.5391, df = 497.19, p-value = 4.935e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.03531633 0.07414200
## sample estimates:
## mean of x mean of y 
## 0.2026375 0.1479083
effectsize::cohens_d(older$median_est, younger$median_est, pooled = TRUE)
## Cohen's d |       95% CI
## ------------------------
## 0.43      | [0.27, 0.59]
## 
## - Estimated using pooled SD.

3.6.2 FWHM

filtered_df <- icc_suprathresh %>%
  filter(motion == 'opt1' & model == 'CueMod' & con == 'LgainNeut')

older <- filtered_df %>% filter(study %in% c('mls', 'ahrb')) 
younger <- filtered_df %>% filter(study == 'abcd') 

cat("Comparing (x) older samples AHRB/MLS (n = ", nrow(older),")", "versus (y) younger sample ABCD (n = ", nrow(younger),")")
## Comparing (x) older samples AHRB/MLS (n =  10 ) versus (y) younger sample ABCD (n =  5 )
t.test(older$median_est, younger$median_est)
## 
##  Welch Two Sample t-test
## 
## data:  older$median_est and younger$median_est
## t = 8.7947, df = 12.823, p-value = 8.651e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.07246049 0.11973951
## sample estimates:
## mean of x mean of y 
##    0.1965    0.1004
effectsize::cohens_d(older$median_est, younger$median_est, pooled = TRUE)
## Cohen's d |       95% CI
## ------------------------
## 3.73      | [1.93, 5.48]
## 
## - Estimated using pooled SD.

3.6.3 Model Parameterization

filtered_df <- icc_suprathresh %>%
  filter(motion == 'opt1' & fwhm == '4.8' & con == 'LgainNeut')

# Separate the fwhm values based on study groups
older <- filtered_df %>% filter(study %in% c('mls', 'ahrb')) 
younger <- filtered_df %>% filter(study == 'abcd') 

cat("Comparing (x) older samples AHRB/MLS (n = ", nrow(older),")", "versus (y) younger sample ABCD (n = ", nrow(younger),")")
## Comparing (x) older samples AHRB/MLS (n =  6 ) versus (y) younger sample ABCD (n =  3 )
t.test(older$median_est, younger$median_est)
## 
##  Welch Two Sample t-test
## 
## data:  older$median_est and younger$median_est
## t = 4.6053, df = 5.0865, p-value = 0.005569
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.03201629 0.11198371
## sample estimates:
##  mean of x  mean of y 
## 0.16633333 0.09433333
effectsize::cohens_d(older$median_est, younger$median_est, pooled = TRUE)
## Cohen's d |       95% CI
## ------------------------
## 2.23      | [0.39, 3.99]
## 
## - Estimated using pooled SD.

3.6.4 Contrasts

filtered_df <- icc_suprathresh %>%
  filter(con == 'LgainNeut' & fwhm == '3.6' & model == 'FixMod')

# Separate the fwhm values based on study groups
older <- filtered_df %>% filter(study %in% c('mls', 'ahrb')) 
younger <- filtered_df %>% filter(study == 'abcd') 

cat("Comparing (x) older samples AHRB/MLS (n = ", nrow(older),")", "versus (y) younger sample ABCD (n = ", nrow(younger),")")
## Comparing (x) older samples AHRB/MLS (n =  8 ) versus (y) younger sample ABCD (n =  4 )
t.test(older$median_est, younger$median_est)
## 
##  Welch Two Sample t-test
## 
## data:  older$median_est and younger$median_est
## t = 2.4819, df = 9.3382, p-value = 0.03399
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.003414 0.069586
## sample estimates:
## mean of x mean of y 
##    0.1035    0.0670
effectsize::cohens_d(older$median_est, younger$median_est, pooled = TRUE)
## Cohen's d |        95% CI
## -------------------------
## 1.13      | [-0.19, 2.40]
## 
## - Estimated using pooled SD.

3.6.5 Motion

filtered_df <- icc_suprathresh %>%
  filter(con == 'LgainNeut' & fwhm == '3.6' & model == 'FixMod')

# Separate the fwhm values based on study groups
older <- filtered_df %>% filter(study %in% c('mls', 'ahrb')) 
younger <- filtered_df %>% filter(study == 'abcd') 

cat("Comparing (x) older samples AHRB/MLS (n = ", nrow(older),")", "versus (y) younger sample ABCD (n = ", nrow(younger),")")
## Comparing (x) older samples AHRB/MLS (n =  8 ) versus (y) younger sample ABCD (n =  4 )
t.test(older$median_est, younger$median_est)
## 
##  Welch Two Sample t-test
## 
## data:  older$median_est and younger$median_est
## t = 2.4819, df = 9.3382, p-value = 0.03399
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.003414 0.069586
## sample estimates:
## mean of x mean of y 
##    0.1035    0.0670
effectsize::cohens_d(older$median_est, younger$median_est, pooled = TRUE)
## Cohen's d |        95% CI
## -------------------------
## 1.13      | [-0.19, 2.40]
## 
## - Estimated using pooled SD.

3.7 Model Effieincy, ses-1

subset <- by_sample_conmod(efficiency_df, y_est = "eff_est", y_min = 0, y_max = 15, y_lab = "Efficiency Estimate")
motion_rg = create_plot(efficiency_df, y_est = "eff_est", "motion","Efficiency", 
                        y_min = 0, y_max = 15, y_lab = "Efficiency Estimate", add_title=FALSE)
modeltype_rg = create_plot(efficiency_df, y_est = "eff_est", "model","Efficiency", 
                           y_min = 0, y_max = 15, y_lab = "Efficiency Estimate", add_title=FALSE)
contrast_rg = create_plot(efficiency_df, y_est = "eff_est", "con","Efficiency", 
                          y_min = 0, y_max = 15, y_lab = "Efficiency Estimate",  add_title=FALSE)


cat("Plotting A = Motion; B = Model Parameterization, C = Contrast for Efficiency")
## Plotting A = Motion; B = Model Parameterization, C = Contrast for Efficiency
(motion_rg) / (modeltype_rg + contrast_rg) + plot_annotation(tag_levels = c("A","B","C")) & theme(plot.tag = element_text(size = 30, face = "bold")) 

subset

4 Med/Min/Max Across Models

`

4.1 Subtreshold

cat("Subthreshold mask")
## Subthreshold mask
cat("\tICC, Between Subject and Within Subject median, miniumum and maximum")
##  ICC, Between Subject and Within Subject median, miniumum and maximum
calculate_summary_stats(icc_subthresh, median_est, "ICC")
study est_type median mean sd min max
abcd ICC 0.0860 0.1139458 0.0953218 -0.053 0.357
ahrb ICC 0.1330 0.1507958 0.0922088 0.028 0.389
mls ICC 0.1185 0.1458833 0.1038028 0.026 0.470
calculate_summary_stats(bs_subthresh, median_est, "Between Subject")
study est_type median mean sd min max
abcd Between Subject 0.061 0.1145958 0.1529663 -0.020 0.874
ahrb Between Subject 0.041 0.0775375 0.0904222 0.003 0.463
mls Between Subject 0.089 0.2606667 0.3418946 0.003 1.323
calculate_summary_stats(ws_subthresh, median_est, "Within Subject")
study est_type median mean sd min max
abcd Within Subject 0.6080 0.7138375 0.4403160 0.083 2.162
ahrb Within Subject 0.3125 0.3858417 0.2514988 0.052 1.303
mls Within Subject 0.8755 1.0090792 0.7271135 0.081 3.261

4.2 Suprathreshold

cat("Suprathreshold mask")
## Suprathreshold mask
cat("\tICC, Between Subject and Within Subject median, miniumum and maximum")
##  ICC, Between Subject and Within Subject median, miniumum and maximum
calculate_summary_stats(icc_suprathresh, median_est, "ICC")
study est_type median mean sd min max
abcd ICC 0.1065 0.1479083 0.1231146 -0.071 0.433
ahrb ICC 0.1760 0.1991250 0.1281665 -0.001 0.522
mls ICC 0.1750 0.2061500 0.1292663 0.041 0.550
calculate_summary_stats(bs_suprathresh, median_est, "Between Subject")
study est_type median mean sd min max
abcd Between Subject 0.0565 0.1013292 0.1254195 -0.020 0.679
ahrb Between Subject 0.0390 0.0746083 0.0876726 0.000 0.418
mls Between Subject 0.0965 0.2349458 0.2878566 0.004 1.210
calculate_summary_stats(ws_suprathresh, median_est, "Within Subject")
study est_type median mean sd min max
abcd Within Subject 0.5025 0.5991125 0.3878716 0.063 2.037
ahrb Within Subject 0.2130 0.2539833 0.1539343 0.038 0.805
mls Within Subject 0.5225 0.6065208 0.4380175 0.047 1.908

4.3 simlarity

cat("Similarity median, minimum and maximum for jaccard and spearman")
## Similarity median, minimum and maximum for jaccard and spearman
calculate_summary_stats(similarity_df, jaccard, "Jaccard")
study est_type median mean sd min max
abcd Jaccard 0.0862590 0.1143020 0.0922401 0.0074734 0.4557094
ahrb Jaccard 0.1799773 0.2119861 0.1530871 0.0089624 0.6420259
mls Jaccard 0.3412880 0.3433006 0.1118254 0.1470588 0.6018436
calculate_summary_stats(similarity_df, spearman, "Spearman")
study est_type median mean sd min max
abcd Spearman 0.6751975 0.6754710 0.1382008 0.3536593 0.8946310
ahrb Spearman 0.7323860 0.6835125 0.2169564 0.2208462 0.9560855
mls Spearman 0.8449316 0.8024856 0.1193737 0.4708645 0.9537408

4.4 NAcc

cat("\tICC Right and Left avg ICC, miniumum and maximum")
##  ICC Right and Left avg ICC, miniumum and maximum
calculate_summary_stats(icc_nacc, avg_right, "Right avg ICC")
study est_type median mean sd min max
abcd Right avg ICC 0.0705 0.0837542 0.0741697 -0.042 0.316
ahrb Right avg ICC 0.0255 0.0414333 0.1061659 -0.251 0.419
mls Right avg ICC 0.1025 0.1074417 0.0998838 -0.072 0.402
calculate_summary_stats(icc_nacc, avg_left, "Left avg ICC")
study est_type median mean sd min max
abcd Left avg ICC 0.086 0.0889083 0.0821103 -0.061 0.319
ahrb Left avg ICC 0.136 0.1067750 0.1235957 -0.233 0.455
mls Left avg ICC 0.167 0.1731958 0.0878092 0.027 0.442

5 Specification Curve

Creating data in a format that is compatible with specr. Needs: estimate (i.e., ICC), std.error, conf.high, conf.low.

5.1 ICC

5.1.1 suprathreshold

5.1.1.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

# first, combine independent model vars into string to create average for each model type
icc_suprathresh$model_type <- paste(icc_suprathresh$fwhm, icc_suprathresh$motion,
                                    icc_suprathresh$con,  icc_suprathresh$model,
                                    sep = "_")

# calculate the avg estimate of ICC across study, standard error and +/- 95% confidence interval. In complete version
df_summ <- icc_suprathresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "supra-threshold ICC"
create_specr_plot(df_summ, est_label)

5.1.1.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "supra-threshold ICC"
create_specr_plot(df_summ_subset, est_label)

5.1.2 subthreshold

5.1.2.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

icc_subthresh$model_type <- paste(icc_subthresh$fwhm, icc_subthresh$motion,
                                  icc_subthresh$con,  icc_subthresh$model,
                                  sep = "_")

df_summ <- icc_subthresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "sub-threshold ICC"
create_specr_plot(df_summ, est_label)

5.1.2.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "sub-threshold ICC"
create_specr_plot(df_summ_subset, est_label)

5.1.3 NAcc Avg

5.1.3.1 Left NAcc

creating combined panel 1 and panel 2 for all model permutations first.

# first, combine independent model vars into string to create average for each model type
icc_nacc$model_type <- paste(icc_nacc$fwhm, icc_nacc$motion,
                             icc_nacc$con,  icc_nacc$model,
                             sep = "_")

# calculate the avg estimate of ICC across study, standard error and +/- 95% confidence interval. In complete version
df_summ <- icc_nacc %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(avg_left), std.error = sd(avg_left)/sqrt(length(avg_left))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "Left NAcc ICC"
create_specr_plot(df_summ, est_label)

estimate variability across samples

# median + variable for top
icc_nacc %>% 
  filter(model_type=='8.4_opt1_SgainBase_CueMod' | model_type=='8.4_opt1_LgainBase_CueMod' | model_type=='8.4_opt1_LgainNeut_CueMod')
##         con motion  model fwhm avg_left avg_right study
## 1 LgainBase   opt1 CueMod  8.4    0.128     0.121  abcd
## 2 LgainNeut   opt1 CueMod  8.4    0.200     0.112  abcd
## 3 SgainBase   opt1 CueMod  8.4    0.216     0.223  abcd
## 4 LgainBase   opt1 CueMod  8.4    0.177     0.236  ahrb
## 5 LgainNeut   opt1 CueMod  8.4    0.136     0.050  ahrb
## 6 SgainBase   opt1 CueMod  8.4    0.455     0.419  ahrb
## 7 LgainBase   opt1 CueMod  8.4    0.406     0.291   mls
## 8 LgainNeut   opt1 CueMod  8.4    0.233     0.167   mls
## 9 SgainBase   opt1 CueMod  8.4    0.419     0.290   mls
##                  model_type
## 1 8.4_opt1_LgainBase_CueMod
## 2 8.4_opt1_LgainNeut_CueMod
## 3 8.4_opt1_SgainBase_CueMod
## 4 8.4_opt1_LgainBase_CueMod
## 5 8.4_opt1_LgainNeut_CueMod
## 6 8.4_opt1_SgainBase_CueMod
## 7 8.4_opt1_LgainBase_CueMod
## 8 8.4_opt1_LgainNeut_CueMod
## 9 8.4_opt1_SgainBase_CueMod

5.1.3.2 Right NAcc

creating combined panel 1 and panel 2 for all model permutations first.

# first, combine independent model vars into string to create average for each model type
icc_nacc$model_type <- paste(icc_nacc$fwhm, icc_nacc$motion,
                             icc_nacc$con,  icc_nacc$model,
                             sep = "_")

# calculate the avg estimate of ICC across study, standard error and +/- 95% confidence interval. In complete version
df_summ <- icc_nacc %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(avg_right), std.error = sd(avg_right)/sqrt(length(avg_right))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "Right NAcc ICC"
create_specr_plot(df_summ, est_label)

5.2 Between Subject

5.2.1 suprathreshold

5.2.1.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

bs_suprathresh$model_type <- paste(bs_suprathresh$fwhm, bs_suprathresh$motion,
                                   bs_suprathresh$con,  bs_suprathresh$model,
                                   sep = "_")

df_summ <- bs_suprathresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "supra-threshold Between Subject"
create_specr_plot(df_summ, est_label)

5.2.1.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "supra-threshold Between Subject"
create_specr_plot(df_summ_subset, est_label)

5.2.2 subthreshold

5.2.2.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

bs_subthresh$model_type <- paste(bs_subthresh$fwhm, bs_subthresh$motion,
                                 bs_subthresh$con,  bs_subthresh$model,
                                 sep = "_")


df_summ <- bs_subthresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "sub-threshold Between Subject"
create_specr_plot(df_summ, est_label)

5.2.2.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "suprathreshold Between Subject"
create_specr_plot(df_summ_subset, est_label)

5.3 Within Subject

5.3.1 suprathreshold

5.3.1.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

ws_suprathresh$model_type <- paste(ws_suprathresh$fwhm, ws_suprathresh$motion,
                                   bs_suprathresh$con,  ws_suprathresh$model,
                                   sep = "_")

df_summ <- ws_suprathresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "supra-threshold Within Subject"
create_specr_plot(df_summ, est_label)

5.3.1.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "supra-threshold Within Subject"
create_specr_plot(df_summ_subset, est_label)

5.3.2 subthreshold

5.3.2.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

ws_subthresh$model_type <- paste(ws_subthresh$fwhm, ws_subthresh$motion,
                                 ws_subthresh$con,  ws_subthresh$model,
                                 sep = "_")

# calculate the avg estimate of ICC across study, standard error and +/- 95% confidence interval. In complete version
df_summ <- ws_subthresh %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(median_est), std.error = sd(median_est)/sqrt(length(median_est))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "sub-threshold Within Subject"
create_specr_plot(df_summ, est_label)

5.3.2.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subplot
est_label = "sub-threshold Within Subject"
create_specr_plot(df_summ_subset, est_label)

5.4 Similarity

5.4.1 Jaccard

5.4.1.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

similarity_df$model_type <- paste(similarity_df$fwhm, similarity_df$motion,
                                  similarity_df$con, similarity_df$model,
                                  sep = "_")

df_summ <- similarity_df %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(jaccard), std.error = sd(jaccard)/sqrt(length(jaccard))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "Jaccard"
create_specr_plot(df_summ, est_label)

5.4.1.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "Jaccard"
create_specr_plot(df_summ_subset, est_label)

5.4.2 Spearman

5.4.2.1 All models

creating combined panel 1 and panel 2 for all model permutations first.

similarity_df$model_type <- paste(similarity_df$fwhm, similarity_df$motion,
                                  similarity_df$con,  similarity_df$model, 
                                  sep = "_")

# calculate the avg estimate of ICC across study, standard error and +/- 95% confidence interval. In complete version
df_summ <- similarity_df %>% 
  group_by(model_type) %>% 
  summarise(estimate = mean(spearman), std.error = sd(spearman)/sqrt(length(spearman))) %>% 
  mutate(conf.low = estimate - 1.96 * std.error, conf.high = estimate + 1.96 * std.error) %>% 
  separate(col = model_type, into = c("fwhm","motion","contrast","model"), sep = "_|-", remove = FALSE)

# plot
est_label = "Spearman"
create_specr_plot(df_summ, est_label)

5.4.2.2 subset models

Creating model that is subset to visualize reliability for the top (>75th) and bottom (< 25th) quartile

# get 75th/25th qunatiles
top_75q = as.numeric(quantile(df_summ$estimate, .75))
bot_25q = as.numeric(quantile(df_summ$estimate, .25))
df_summ_subset = df_summ %>% 
  filter(estimate < bot_25q | estimate > top_75q)

# subset plots
est_label = "Spearman"
create_specr_plot(df_summ_subset, est_label)

6 ICC parameter subsampling

We state:

Aim 3: evaluates the sample size at which the ICC stabilizes. The chosen pipeline will be based on the highest median ICC across the studies for the suprathreshold task-positive mask from Aim 1a that will be rerun for the ABCD sample. Based on this pipeline, the first-level analysis steps are repeated for N = 525 from the N = 2000 subsample for only the ABCD data. Then, voxelwise_icc within the brain_icc.py script is used to derive estimates of the median ICC, Between Subject and Within Subject for the between runs (e.g., measurement occasions) reliability across randomly sampled subjects for 25 to 525 subjects in intervals of 50. Similar to the methods in Liu et al. (2023), 100 iterations are performed at each N (with replacement) and the median ICC, the associated Between Subject and Within Subject estimates are retained from voxelwise_icc. The average and standard error across the 100 iterations is plotted for each interval of N with the y-axis representing the ICC and x-axis representing N. The plotted values will be used to infer change and stability in the estimated median ICCs and variance components across the sample size. If stability is not achieved by N = 500, the sample is extended to N = 1,000 and the analyses are repeated.

icc_subsamp_lg <- abcd_icc_subsamp %>%
  gather(key, value, starts_with(c("icc_", "msbtwn_", "mswthn_"))) %>%
  separate(key, into = c("variable", "mask"), sep = "_") %>%
  spread(variable, value)
n_range = seq(25,525,50)

6.1 ICC

icc_summary <- icc_subsamp_lg %>%
  group_by(mask,subsample_n) %>%
  summarize(mean_icc = mean(icc),
            lower_ci = quantile(icc, 0.025),
            upper_ci = quantile(icc, 0.975))
## `summarise()` has grouped output by 'mask'. You can override using the
## `.groups` argument.
ggplot() +
  # add individual lines of ICC fo subsample_n + add mean
  geom_path(data = icc_subsamp_lg, aes(x = subsample_n, y = icc, group = as.factor(seed)),
            alpha = 0.1) +  # Use geom_path to connect individual points
  geom_line(data = icc_summary, aes(x = subsample_n, y = mean_icc), alpha = .5) + 
  # create ribbon + 95CI upper/lower
  geom_ribbon(data = icc_summary, aes(x = subsample_n, ymin = lower_ci, ymax = upper_ci), 
              alpha = 0.1) +  
  geom_line(data = icc_summary, aes(x = subsample_n, y = lower_ci), linetype = "dashed", color = "red") +
  geom_line(data = icc_summary, aes(x = subsample_n, y = upper_ci), linetype = "dashed", color = "red") +  
  labs(title = "Individual points, Mean & +/-95% CI of ICC point estimate across 100 bootstraps at each N", 
       subtitle = paste0("N=25 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(icc)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(icc)), 2),
                        ") N=525 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(icc)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(icc)), 2)
                        ,")"),
       caption = paste("N range:",
                       paste(n_range, collapse = ", "),
                       "\n100 random iterations per N"),
       x = "", y = "Median ICC") +
  ylim(0, 0.75) +
  facet_wrap(~ mask) +
  theme_minimal()

icc_supra <- icc_subsamp_lg %>%
  filter(mask=="supra") %>% 
  group_by(subsample_n) %>%
  summarize(mean_icc = mean(icc),
            lower_ci = quantile(icc, 0.025),
            upper_ci = quantile(icc, 0.975))

icc_supra_lg = icc_subsamp_lg %>% 
  filter(mask=="supra")

ggplot() +
  # add individual lines of ICC fo subsample_n + add mean
  geom_path(data = icc_supra_lg, aes(x = subsample_n, y = icc, group = as.factor(seed)),
            alpha = 0.1) +  # Use geom_path to connect individual points
  geom_line(data = icc_supra, aes(x = subsample_n, y = mean_icc), alpha = .5) + 
  # create ribbon + 95CI upper/lower
  geom_ribbon(data = icc_supra, aes(x = subsample_n, ymin = lower_ci, ymax = upper_ci), 
              alpha = 0.1) +  
  geom_line(data = icc_supra, aes(x = subsample_n, y = lower_ci), linetype = "dashed", color = "red") +
  geom_line(data = icc_supra, aes(x = subsample_n, y = upper_ci), linetype = "dashed", color = "red") +  
  labs(title = "Individual points, Mean & +/-95% CI of ICC point estimate across 100 bootstraps at each N", 
       subtitle = paste0("N=25 (Max:", 
                        round(max(icc_supra_lg %>% filter(subsample_n == 25) %>% pull(icc)), 2),
                        " Min:", 
                        round(min(icc_supra_lg %>% filter(subsample_n == 25) %>% pull(icc)), 2),
                        ") N=525 (Max:", 
                        round(max(icc_supra_lg %>% filter(subsample_n == 525) %>% pull(icc)), 2),
                        " Min:", 
                        round(min(icc_supra_lg %>% filter(subsample_n == 525) %>% pull(icc)), 2)
                        ,")"),
       caption = paste("N range:",
                       paste(n_range, collapse = ", "),
                       "\n100 random iterations per N"),
       x = "", y = "Median ICC") +
  ylim(-.1, 1) +
  theme_minimal()

6.2 Between Subject

between_sub <- icc_subsamp_lg %>%
  group_by(mask,subsample_n) %>%
  summarize(mean_msbthn = mean(msbtwn),
            lower_ci = quantile(msbtwn, 0.025),
            upper_ci = quantile(msbtwn, 0.975))
## `summarise()` has grouped output by 'mask'. You can override using the
## `.groups` argument.
ggplot() +
  # add individual lines of ICC fo subsample_n + add mean
  geom_path(data = icc_subsamp_lg, aes(x = subsample_n, y = msbtwn, group = as.factor(seed)),
            alpha = 0.1) +  # Use geom_path to connect individual points
  geom_line(data = between_sub, aes(x = subsample_n, y = mean_msbthn), alpha = .5) + 
  # create ribbon + 95CI upper/lower
  geom_ribbon(data = between_sub, aes(x = subsample_n, ymin = lower_ci, ymax = upper_ci), 
              alpha = 0.1) +  
  geom_line(data = between_sub, aes(x = subsample_n, y = lower_ci), linetype = "dashed", color = "red") +
  geom_line(data = between_sub, aes(x = subsample_n, y = upper_ci), linetype = "dashed", color = "red") +  
  labs(title = "Individual points, Mean & +/-95% CI of Between Subject point estimate across 100 bootstraps at each N", 
       subtitle = paste0("N=25 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(msbtwn)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(msbtwn)), 2),
                        ") N=525 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(msbtwn)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(msbtwn)), 2)
                        ,")"),
       caption = paste("N range:",
                       paste(n_range, collapse = ", "),
                       "\n100 random iterations per N"),
       x = "", y = "Median BS") +
  ylim(-0.1, 1) +
  facet_wrap(~ mask) +
  theme_minimal()

6.3 Within Subject

within_subject <- icc_subsamp_lg %>%
  group_by(mask,subsample_n) %>%
  summarize(mean_mswthn = mean(mswthn),
            lower_ci = quantile(mswthn, 0.025),
            upper_ci = quantile(mswthn, 0.975))
## `summarise()` has grouped output by 'mask'. You can override using the
## `.groups` argument.
ggplot() +
  # add individual lines of ICC fo subsample_n + add mean
  geom_path(data = icc_subsamp_lg, aes(x = subsample_n, y = mswthn, group = as.factor(seed)),
            alpha = 0.1) +  # Use geom_path to connect individual points
  geom_line(data = within_subject, aes(x = subsample_n, y = mean_mswthn), alpha = .5) + 
  # create ribbon + 95CI upper/lower
  geom_ribbon(data = within_subject, aes(x = subsample_n, ymin = lower_ci, ymax = upper_ci), 
              alpha = 0.1) +  
  geom_line(data = within_subject, aes(x = subsample_n, y = lower_ci), linetype = "dashed", color = "red") +
  geom_line(data = within_subject, aes(x = subsample_n, y = upper_ci), linetype = "dashed", color = "red") +  
  labs(title = "Individual points, Mean & +/-95% CI of Within Subject point estimate across 100 bootstraps at each N", 
       subtitle = paste0("N=25 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(msbtwn)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 25) %>% pull(msbtwn)), 2),
                        ") N=525 (Max:", 
                        round(max(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(msbtwn)), 2),
                        " Min:", 
                        round(min(icc_subsamp_lg %>% filter(subsample_n == 525) %>% pull(msbtwn)), 2)
                        ,")"),
       caption = paste("N range:",
                       paste(n_range, collapse = ", "),
                       "\n100 random iterations per N"),
       x = "", y = "Median WS") +
  ylim(-.1, 2) +
  facet_wrap(~ mask) +
  theme_minimal()